# Notes for formatting:
# include = FALSE prevents code and results from appearing in the finished file. R Markdown still runs the code in the chunk, and the results can be used by other chunks.
# echo = FALSE prevents code, but not the results from appearing in the finished file. This is a useful way to embed figures.
# message = FALSE prevents messages that are generated by code from appearing in the finished file.
# warning = FALSEprevents warnings that are generated by code from appearing in the finished.
library(data.table)
library(readr)
library(readxl)
library(stargazer)
library(tidyverse)

This problem set is due on October 26. I strongly recommend to start working on the homework early. You can work in pairs and submit a common solution. Please submit the homework as an R markdown file (if there are data files, they put all the files in a zip file). The code must run without errors. To make this easier, set the working directory at the beginning so it can be easily changed by someone else running the code.

For this exercise you will use a dataset collected by S&P Global. Choose one of the datasets available, which have data for either 2009 or 2018 for one of the following ISOs: MISO, PJM, ERCOT, or New England ISO. The goal of the exercise is to build the supply curve for a wholesale electricity market and to analyze how costs determine the composition of fuels and emissions. We will also use the exercise to see how things would change with a carbon tax.

Select the following variables from your dataset:

ercot2018 <- fread("data/ercot2018.csv")
ercot2009 <- fread("data/ERCOT_2009.csv")
ne2009 <- read_excel("data/ISO_NE_2009.xls", 
         col_types = c("text", "numeric", "numeric", 
         "text", "text", "text", "text", "numeric", 
         "numeric", "numeric", "numeric", 
         "numeric", "numeric", "numeric", 
         "numeric", "numeric", "numeric", 
         "numeric", "numeric", "numeric", 
         "numeric", "numeric", "numeric", 
         "numeric", "text", "text", "text", 
         "text", "text", "text", "text", "text", 
         "text", "text"))
## New names:
## * `` -> ...1
ne2009 <- as.data.table(ne2009)
miso2018 <- fread("data/MISO2018.csv")
miso2018[,`Operating Status` := "Operating"]
ne2018 <- fread("data/NEISO_2018.csv")
ny2018 <- fread("data/NYISO_2018.csv") 
pjm2009 <- fread("data/PJM2009.csv", skip = 31)

# Get all the data in one list and name it
alldata <- list(ercot2018, ercot2009,ne2009,miso2018,ne2018,ny2018, pjm2009)
names(alldata) <- c("ercot2018", "ercot2009","ne2009","miso2018","ne2018","ny2018", "pjm2009")

# Get a unique list of all the column names
oldcols <- unique(unlist(sapply(alldata, names)))

# Map each of these to the variable name I want
newcols <- c('name','pkey','pukey','unitno','fueltype','tech','operating','capacity','capacity_adj','capacity_cum','vcost_om', 'vcost_fuel','vcost_om2','vcost_om3','vcost_emissions','fcost','tcost_om','heat_rate','heat_input','net_generation','cap_factor','nox','sox','co2','cogen','gscfuelsrc','gscnonfuelsrc','gscallowsrc','gscheatratesrc', 'gscheatinputsrc','gscgensrc','nox_emratesrc','so2_emratesrc','co2_emratesrc','name')

# Print to see that these match up.
names(newcols) <- oldcols
newcols
##                               Plant Name 
##                                   "name" 
##                       MI Power Plant Key 
##                                   "pkey" 
##                  MI Power Plant Unit Key 
##                                  "pukey" 
##                                  Unit No 
##                                 "unitno" 
##                        Primary Fuel Type 
##                               "fueltype" 
##                    Generation Technology 
##                                   "tech" 
##                         Operating Status 
##                              "operating" 
##                     Summer Capacity (MW) 
##                               "capacity" 
##            Adjusted Summer Capacity (MW) 
##                           "capacity_adj" 
##          Cumulative Summer Capacity (MW) 
##                           "capacity_cum" 
##               Variable O&M Costs ($/MWh) 
##                               "vcost_om" 
##                 Total Fuel Costs ($/MWh) 
##                             "vcost_fuel" 
##      Non-Fuel Variable O&M Costs ($/MWh) 
##                              "vcost_om2" 
## Non-FuelNon-AllowanceVariable O&M($/MWh) 
##                              "vcost_om3" 
##         Emission Allowance Costs ($/MWh) 
##                        "vcost_emissions" 
##               Fixed O&M Cost ($/kW-Year) 
##                                  "fcost" 
##     Total Operation & Maint Cost ($/MWh) 
##                               "tcost_om" 
##                      Heat Rate (Btu/kWh) 
##                              "heat_rate" 
##                       Heat Input (MMBtu) 
##                             "heat_input" 
##                     Net Generation (MWh) 
##                         "net_generation" 
##                      Capacity Factor (%) 
##                             "cap_factor" 
##           NOX Emissions Rate (lbs/MMBtu) 
##                                    "nox" 
##           SO2 Emissions Rate (lbs/MMBtu) 
##                                    "sox" 
##           CO2 Emissions Rate (lbs/MMBtu) 
##                                    "co2" 
##                             Cogenerator? 
##                                  "cogen" 
##                     GSC Fuel Cost Source 
##                             "gscfuelsrc" 
##                 GSC Non Fuel Cost Source 
##                          "gscnonfuelsrc" 
##                GSC Allowance Cost Source 
##                            "gscallowsrc" 
##                     GSC Heat Rate Source 
##                         "gscheatratesrc" 
##                    GSC Heat Input Source 
##                        "gscheatinputsrc" 
##                    GSC Generation Source 
##                              "gscgensrc" 
##                NOX Emissions Rate Source 
##                          "nox_emratesrc" 
##                SO2 Emissions Rate Source 
##                          "so2_emratesrc" 
##                CO2 Emissions Rate Source 
##                          "co2_emratesrc" 
##                                     ...1 
##                                   "name"
# Now loop through all the dts and set the new names and only choose the columns we want. 
format <- function (dt){
  # Set the new names for each column
  setnames(dt,oldcols, newcols, skip_absent = T)
  return(dt)
}

# Apply the above fucntion
alldata <- lapply(alldata, format)

Part 1

Start by cleaning and understanding your data. For this, do the following: #### What does each variable represent? Just check understanding here:

Assign convenient yet meaningful names to each variable in the dataset.

# Get the list of columns we want to select
wantcols <- c("pukey",'fueltype',"tech","capacity","vcost_om","vcost_fuel","vcost_emissions","fcost","heat_rate","heat_input","net_generation","cap_factor","nox","sox","co2","operating")

selec <- function (dt){
  # Select only the columns we want
  dt <- dt[,..wantcols]
  return(dt)
}

alldata <- lapply(alldata, selec)

# Print the top couple rows of each to see that it worked. 
lapply(alldata, dim) # Check there are 15 columns
## $ercot2018
## [1] 977  16
## 
## $ercot2009
## [1] 776  16
## 
## $ne2009
## [1] 1300   16
## 
## $miso2018
## [1] 3475   16
## 
## $ne2018
## [1] 1791   16
## 
## $ny2018
## [1] 1257   16
## 
## $pjm2009
## [1] 3056   16

What is the class of each variable? Make sure to convert them to the proper class before doing this. For example, if net generation is a character, make it numeric.

# ANSWER KEY ONLY: I'll join all the data to be one data type in order not use loops 
# all the time for readability, and filter at the end to make it work for each graph. 
dt <- rbindlist(alldata, use.names = TRUE, idcol = 'isoyear')
# Capture the iso (as many characters as you like from a-z at the beginning)
dt[,iso:=str_match(isoyear,"^[a-z]*")] 
# Capture the year (4 digits from 0-9)
dt[,year:=as.integer(str_match(isoyear,"[0-9]{4}$"))] 

# Select the ones that I want to change the type of using a regular expression
convert_cols <- names(dt)[grepl('cost|heat|gen|cap',names(dt))]

# Some data cleaning needed before changing to numeric. 
# gsub isn't vectorized, so can't do this in data.table. 
for (col in convert_cols){
  # This is needed for all except ne2009
  # There are some commas that separate numbers that result in nas e.g. 100,000
  dt[[col]] <- gsub(",","",dt[[col]]) 
  # For pjm 2009 only because 0s are entered as "(0.00)"
  dt[[col]] <- gsub("\\(","",dt[[col]])
  dt[[col]] <- gsub("\\)","",dt[[col]])
}

# error checking why 
error <- dt[,as.numeric(vcost_emissions)]
#dt[is.na(error)]

# After the loop of checking that I'm not deleting data
dt<-dt[!is.na(error)]

# Convert the columns to the right datatype. 
dt[, (convert_cols) := lapply(.SD, as.numeric),.SDcols = convert_cols]
# Check it worked
str(dt)
## Classes 'data.table' and 'data.frame':   12497 obs. of  19 variables:
##  $ isoyear        : chr  "ercot2018" "ercot2018" "ercot2018" "ercot2018" ...
##  $ pukey          : num  44093 42741 42699 48052 44660 ...
##  $ fueltype       : chr  "Solar" "Other Fuel" "Other Fuel" "Other Fuel" ...
##  $ tech           : chr  "Solar" "Other" "Other" "Other" ...
##  $ capacity       : num  1 4 2.5 2.5 1.5 22.1 8.1 30 13.5 30 ...
##  $ vcost_om       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ vcost_fuel     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ vcost_emissions: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fcost          : num  28.3 0 0 0 28.3 28.3 28.3 0 28.3 28.3 ...
##  $ heat_rate      : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ heat_input     : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ net_generation : num  NA NA NA NA NA ...
##  $ cap_factor     : num  NA 0 NA NA NA ...
##  $ nox            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ sox            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ co2            : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ operating      : chr  "Operating" "Operating" "Operating" "Operating" ...
##  $ iso            : chr  "ercot" "ercot" "ercot" "ercot" ...
##  $ year           : int  2018 2018 2018 2018 2018 2018 2018 2018 2018 2018 ...
##  - attr(*, ".internal.selfref")=<externalptr>
# I am only going to use operating plants
dt <- dt[operating == 'Operating']

Describe each variable: what values does it take? Do you have any concerns about some variable (extreme values, missing values)?

You might be concerned about capacity factor being over 100%. Because this is calculated off the summer capacity, this is actually okay - see this link from UCS. The big one is heat_rate, which is the energy input over energy output.

# Extreme values:

# Heat rate - the generator shouldn't be able to produce more secondary than primary energy. 
dt[,energy_efficiency := 3412.14/heat_rate] # convert btu input/kwh output  to kwh output /kwh input
# Potentially Problematic generators by dataset
dt[energy_efficiency>1, .N, by =.(fueltype,isoyear)]  # 47 total problematic ones across all datasets
##               fueltype   isoyear  N
##  1:         Other Fuel ercot2018  1
##  2:        Natural Gas ercot2018  1
##  3:            Biomass    ne2009  1
##  4:               Coal  miso2018  4
##  5:        Natural Gas  miso2018 10
##  6:            Biomass  miso2018  4
##  7: Petroleum Products  miso2018  1
##  8:        Natural Gas    ne2018  3
##  9:        Natural Gas    ny2018  1
## 10: Petroleum Products    ny2018  6
## 11:               Coal   pjm2009  2
## 12:        Natural Gas   pjm2009  4
## 13:         Other Fuel   pjm2009  1
## 14: Petroleum Products   pjm2009  2
# You can choose to drop these or not
# dt<-dt[energy_efficiency<1] 
summary(dt[,energy_efficiency])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.034   0.279   0.314   0.355   0.402   1.137    4379
# Capacity factor should be <100
summary(dt[,cap_factor])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   2.958  32.390  34.734  59.880 178.140    3399
dt[cap_factor>100] #56 potentially problematic ones across all datasets
##       isoyear pukey    fueltype                tech capacity   vcost_om
##  1: ercot2009  2419     Uranium             Nuclear  1280.00  10.430000
##  2: ercot2009  9758 Natural Gas      Combined Cycle   185.00  28.190000
##  3: ercot2009  5500 Natural Gas  Combustion Turbine     3.70  34.300000
##  4:    ne2009 15971       Water               Hydro     6.18   3.533599
##  5:    ne2009 15972       Water               Hydro     6.18   3.533599
##  6:    ne2009 22163 Natural Gas  Combustion Turbine     9.50  57.669563
##  7:    ne2009  2589     Biomass       Steam Turbine    13.00 118.490835
##  8:  miso2018 30061  Other Fuel               Other     5.00   0.000000
##  9:  miso2018 12889     Uranium             Nuclear   227.75  10.010000
## 10:  miso2018  8518     Uranium             Nuclear    57.76  11.110000
## 11:  miso2018 43629     Biomass       Steam Turbine    40.00  31.310000
## 12:  miso2018 21157     Biomass Internal Combustion     0.90  50.380000
## 13:  miso2018 21158     Biomass Internal Combustion     0.90  50.380000
## 14:  miso2018 21159     Biomass Internal Combustion     0.90  50.380000
## 15:  miso2018  1748     Biomass Internal Combustion     0.90  70.180000
## 16:  miso2018  1749     Biomass Internal Combustion     0.90  70.180000
## 17:  miso2018  1750     Biomass Internal Combustion     0.90  70.180000
## 18:    ny2018  6785       Water               Hydro    53.21   1.950000
## 19:    ny2018  6786       Water               Hydro    53.21   1.950000
## 20:    ny2018  6787       Water               Hydro    53.21   1.950000
## 21:    ny2018  6788       Water               Hydro    53.21   1.950000
## 22:    ny2018  6789       Water               Hydro    53.21   1.950000
## 23:    ny2018  6790       Water               Hydro    53.21   1.950000
## 24:    ny2018  6791       Water               Hydro    53.21   1.950000
## 25:    ny2018  6792       Water               Hydro    53.21   1.950000
## 26:    ny2018  6793       Water               Hydro    53.21   1.950000
## 27:    ny2018  6794       Water               Hydro    53.21   1.950000
## 28:    ny2018  6795       Water               Hydro    53.21   1.950000
## 29:    ny2018  6796       Water               Hydro    53.21   1.950000
## 30:    ny2018  6797       Water               Hydro    53.21   1.950000
## 31:    ny2018  6798       Water               Hydro    53.21   1.950000
## 32:    ny2018  6799       Water               Hydro    53.21   1.950000
## 33:    ny2018  6800       Water               Hydro    53.21   1.950000
## 34:   pjm2009  4551        Coal               Other     6.00   0.000000
## 35:   pjm2009  6785       Water               Hydro     0.27   1.590000
## 36:   pjm2009  6786       Water               Hydro     0.27   1.590000
## 37:   pjm2009  6787       Water               Hydro     0.27   1.590000
## 38:   pjm2009  6788       Water               Hydro     0.27   1.590000
## 39:   pjm2009  6789       Water               Hydro     0.27   1.590000
## 40:   pjm2009  6790       Water               Hydro     0.27   1.590000
## 41:   pjm2009  6791       Water               Hydro     0.27   1.590000
## 42:   pjm2009  6792       Water               Hydro     0.27   1.590000
## 43:   pjm2009  6793       Water               Hydro     0.27   1.590000
## 44:   pjm2009  6794       Water               Hydro     0.27   1.590000
## 45:   pjm2009  6795       Water               Hydro     0.27   1.590000
## 46:   pjm2009  6796       Water               Hydro     0.27   1.590000
## 47:   pjm2009  6797       Water               Hydro     0.27   1.590000
## 48:   pjm2009  6798       Water               Hydro     0.27   1.590000
## 49:   pjm2009  6799       Water               Hydro     0.27   1.590000
## 50:   pjm2009  6800       Water               Hydro     0.27   1.590000
## 51:   pjm2009 16807       Water               Hydro     1.78   1.870000
## 52:   pjm2009 16808       Water               Hydro     1.78   1.870000
## 53:   pjm2009 16809       Water               Hydro     1.78   1.870000
## 54:   pjm2009  5141     Biomass       Steam Turbine    15.00  16.380000
## 55:   pjm2009  2848 Natural Gas      Combined Cycle   148.50  34.780000
## 56:   pjm2009  2551        Coal       Steam Turbine    42.50  39.030000
##       isoyear pukey    fueltype                tech capacity   vcost_om
##     vcost_fuel vcost_emissions     fcost heat_rate heat_input net_generation
##  1:    8.32000               0  70.31000     10400         NA       11303915
##  2:   27.59000               0   6.89000      7191         NA        2009477
##  3:   32.70000               0  13.08000      8437     289917          34362
##  4:    0.00000               0  11.42826        NA         NA          59639
##  5:    0.00000               0  11.42826        NA         NA          59639
##  6:   56.34835               0   6.76357     11375     399474          35118
##  7:  112.42832               0  94.22785     17346    1995942         115066
##  8:    0.00000               0   0.00000        NA         NA          43986
##  9:    6.50000               0 120.34000     10400         NA        2010638
## 10:    7.79000               0 118.12000     10400         NA         514138
## 11:    7.67000               0 113.09000      4859     462639          95204
## 12:   37.41000               0  11.75000     10956      86845           7927
## 13:   37.41000               0  11.75000     10956      86845           7927
## 14:   37.41000               0  11.75000     10956      86845           7927
## 15:   57.84000               0   8.38000     16844     184994          10983
## 16:   57.84000               0   8.38000     16844     184994          10983
## 17:   57.84000               0   8.38000     16844     184994          10983
## 18:    0.00000               0   8.82000        NA         NA         474894
## 19:    0.00000               0   8.82000        NA         NA         474894
## 20:    0.00000               0   8.82000        NA         NA         474894
## 21:    0.00000               0   8.82000        NA         NA         474894
## 22:    0.00000               0   8.82000        NA         NA         474894
## 23:    0.00000               0   8.82000        NA         NA         474894
## 24:    0.00000               0   8.82000        NA         NA         474894
## 25:    0.00000               0   8.82000        NA         NA         474894
## 26:    0.00000               0   8.82000        NA         NA         474894
## 27:    0.00000               0   8.82000        NA         NA         474894
## 28:    0.00000               0   8.82000        NA         NA         474894
## 29:    0.00000               0   8.82000        NA         NA         474894
## 30:    0.00000               0   8.82000        NA         NA         474894
## 31:    0.00000               0   8.82000        NA         NA         474894
## 32:    0.00000               0   8.82000        NA         NA         474894
## 33:    0.00000               0   8.82000        NA         NA         474894
## 34:    0.00000               0   0.00000        NA         NA          93628
## 35:    0.00000               0   7.74000        NA         NA           2419
## 36:    0.00000               0   7.74000        NA         NA           2419
## 37:    0.00000               0   7.74000        NA         NA           2419
## 38:    0.00000               0   7.74000        NA         NA           2419
## 39:    0.00000               0   7.74000        NA         NA           2419
## 40:    0.00000               0   7.74000        NA         NA           2419
## 41:    0.00000               0   7.74000        NA         NA           2419
## 42:    0.00000               0   7.74000        NA         NA           2419
## 43:    0.00000               0   7.74000        NA         NA           2419
## 44:    0.00000               0   7.74000        NA         NA           2419
## 45:    0.00000               0   7.74000        NA         NA           2419
## 46:    0.00000               0   7.74000        NA         NA           2419
## 47:    0.00000               0   7.74000        NA         NA           2419
## 48:    0.00000               0   7.74000        NA         NA           2419
## 49:    0.00000               0   7.74000        NA         NA           2419
## 50:    0.00000               0   7.74000        NA         NA           2419
## 51:    0.06000               0  33.63000        NA         NA          42252
## 52:    0.06000               0  33.63000        NA         NA          42252
## 53:    0.06000               0  33.63000        NA         NA          42252
## 54:   11.39000               0  50.48000      3874     533834         137797
## 55:   34.23000               0  13.28000      7238   11661630        1603149
## 56:   34.95000               0  38.27000      9308    3466069         372364
##     vcost_fuel vcost_emissions     fcost heat_rate heat_input net_generation
##     cap_factor    nox    sox      co2 operating   iso year energy_efficiency
##  1:   100.8100     NA     NA       NA Operating ercot 2009         0.3280904
##  2:   121.3700 0.0168 0.0006 118.8573 Operating ercot 2009         0.4745015
##  3:   106.0200 0.0590 0.0020 119.3100 Operating ercot 2009         0.4044257
##  4:   110.1635     NA     NA       NA Operating    ne 2009                NA
##  5:   110.1635     NA     NA       NA Operating    ne 2009                NA
##  6:   114.2272 0.0590 0.0020 119.3100 Operating    ne 2009         0.2999684
##  7:   101.0414 0.1690 0.0290 118.1800 Operating    ne 2009         0.1967105
##  8:   100.4200     NA     NA       NA Operating  miso 2018                NA
##  9:   100.7800     NA     NA       NA Operating  miso 2018         0.3280904
## 10:   101.6200     NA     NA       NA Operating  miso 2018         0.3280904
## 11:   107.7900 0.4140 0.0470 206.4500 Operating  miso 2018         0.7022309
## 12:   100.5500     NA     NA       NA Operating  miso 2018         0.3114403
## 13:   100.5500     NA     NA       NA Operating  miso 2018         0.3114403
## 14:   100.5500     NA     NA       NA Operating  miso 2018         0.3114403
## 15:   139.3100     NA     NA       NA Operating  miso 2018         0.2025730
## 16:   139.3100     NA     NA       NA Operating  miso 2018         0.2025730
## 17:   139.3100     NA     NA       NA Operating  miso 2018         0.2025730
## 18:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 19:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 20:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 21:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 22:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 23:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 24:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 25:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 26:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 27:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 28:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 29:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 30:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 31:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 32:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 33:   101.8800     NA     NA       NA Operating    ny 2018                NA
## 34:   178.1400     NA     NA       NA Operating   pjm 2009                NA
## 35:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 36:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 37:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 38:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 39:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 40:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 41:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 42:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 43:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 44:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 45:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 46:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 47:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 48:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 49:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 50:   102.2700     NA     NA       NA Operating   pjm 2009                NA
## 51:   166.3200     NA     NA       NA Operating   pjm 2009                NA
## 52:   166.3200     NA     NA       NA Operating   pjm 2009                NA
## 53:   166.3200     NA     NA       NA Operating   pjm 2009                NA
## 54:   104.8700 0.3080 0.0290 120.4400 Operating   pjm 2009         0.8807796
## 55:   103.6900 0.0104 0.0006 118.8876 Operating   pjm 2009         0.4714203
## 56:   100.0200 0.1497 0.2180 205.1595 Operating   pjm 2009         0.3665814
##     cap_factor    nox    sox      co2 operating   iso year energy_efficiency
dt[cap_factor>100, .N, by =.(isoyear,fueltype)] 
##       isoyear    fueltype  N
##  1: ercot2009     Uranium  1
##  2: ercot2009 Natural Gas  2
##  3:    ne2009       Water  2
##  4:    ne2009 Natural Gas  1
##  5:    ne2009     Biomass  1
##  6:  miso2018  Other Fuel  1
##  7:  miso2018     Uranium  2
##  8:  miso2018     Biomass  7
##  9:    ny2018       Water 16
## 10:   pjm2009        Coal  2
## 11:   pjm2009       Water 19
## 12:   pjm2009     Biomass  1
## 13:   pjm2009 Natural Gas  1
# Missing values - can't say anything here really.
# Anything with fuel type missing is just an empty row that was imported by accident.
dt<-dt[!fueltype == ""]
summary(dt)
##    isoyear              pukey         fueltype             tech          
##  Length:11408       Min.   :    8   Length:11408       Length:11408      
##  Class :character   1st Qu.: 6932   Class :character   Class :character  
##  Mode  :character   Median :13697   Mode  :character   Mode  :character  
##                     Mean   :16619                                        
##                     3rd Qu.:21578                                        
##                     Max.   :70516                                        
##                                                                          
##     capacity          vcost_om          vcost_fuel      vcost_emissions    
##  Min.   :   0.00   Min.   :   0.000   Min.   :   0.00   Min.   :-0.010000  
##  1st Qu.:   1.00   1st Qu.:   7.739   1st Qu.:   0.00   1st Qu.: 0.000000  
##  Median :   3.67   Median :  30.510   Median :  23.09   Median : 0.000000  
##  Mean   :  53.66   Mean   :  60.061   Mean   :  37.95   Mean   : 0.005215  
##  3rd Qu.:  46.00   3rd Qu.:  62.990   3rd Qu.:  48.89   3rd Qu.: 0.000000  
##  Max.   :1401.00   Max.   :1537.780   Max.   :1392.24   Max.   :12.524668  
##                                                                            
##      fcost           heat_rate        heat_input       net_generation    
##  Min.   :   0.00   Min.   :  3000   Min.   :       0   Min.   :       0  
##  1st Qu.:   7.74   1st Qu.:  8495   1st Qu.:    4843   1st Qu.:    2239  
##  Median :  14.29   Median : 10869   Median :   91828   Median :   11072  
##  Mean   :  25.44   Mean   : 11307   Mean   : 2769345   Mean   :  305574  
##  3rd Qu.:  28.30   3rd Qu.: 12229   3rd Qu.: 1384441   3rd Qu.:  150873  
##  Max.   :1990.78   Max.   :100000   Max.   :99618096   Max.   :11714588  
##                    NA's   :4379     NA's   :6161       NA's   :3913      
##    cap_factor           nox             sox             co2        
##  Min.   :  0.000   Min.   :0.005   Min.   :0.000   Min.   : 15.13  
##  1st Qu.:  2.958   1st Qu.:0.028   1st Qu.:0.001   1st Qu.:118.86  
##  Median : 32.390   Median :0.076   Median :0.002   Median :119.31  
##  Mean   : 34.734   Mean   :0.154   Mean   :0.114   Mean   :140.12  
##  3rd Qu.: 59.880   3rd Qu.:0.186   3rd Qu.:0.041   3rd Qu.:161.86  
##  Max.   :178.140   Max.   :1.606   Max.   :5.901   Max.   :534.59  
##  NA's   :3399      NA's   :7141    NA's   :7141    NA's   :7141    
##   operating             iso                 year      energy_efficiency
##  Length:11408       Length:11408       Min.   :2009   Min.   :0.034    
##  Class :character   Class :character   1st Qu.:2009   1st Qu.:0.279    
##  Mode  :character   Mode  :character   Median :2018   Median :0.314    
##                                        Mean   :2015   Mean   :0.355    
##                                        3rd Qu.:2018   3rd Qu.:0.402    
##                                        Max.   :2018   Max.   :1.137    
##                                                       NA's   :4379
dt[is.na(net_generation), .N, by = isoyear]
##      isoyear    N
## 1: ercot2018  466
## 2: ercot2009   50
## 3:    ne2009  138
## 4:  miso2018  589
## 5:    ne2018 1418
## 6:    ny2018  949
## 7:   pjm2009  303
dt[is.na(net_generation), .N, by = fueltype]
##              fueltype    N
## 1:              Solar  746
## 2:         Other Fuel   36
## 3:               Wind  208
## 4:              Water 1074
## 5:        Natural Gas  927
## 6:               Coal   32
## 7:            Biomass  331
## 8: Petroleum Products  559
dt[is.na(net_generation), .N, by = tech]
##                   tech    N
## 1:               Solar  746
## 2:               Other   42
## 3:                Wind  208
## 4:               Hydro 1074
## 5:       Steam Turbine  215
## 6:  Combustion Turbine  409
## 7:      Combined Cycle  220
## 8: Internal Combustion  999

Part 2

Now let’s look at the importance of each fuel in this market. #### a What is the fuel composition of this market according to capacity (i.e. how much capacity for each fuel)? Show it in a pie chart.

# data for the grap
ggplot(dt[,.(capacity = sum(capacity, na.rm=T)), by = c("isoyear", "fueltype")], aes(x = "",y = capacity, fill = fueltype)) + 
    geom_bar(position = 'fill', stat="identity", width=1) +
    coord_polar("y", start=0)+
    ggtitle('Capacity Fuel composition by market') + 
    # geom_text(aes(x = 1.3, y = midpoint, label = labels)) +
    facet_wrap(~isoyear)+
    theme_void()

#### Part b (b) What is the fuel composition of this market in terms of net generation? Show it in a pie chart.

ggplot(dt[,.(net_generation = sum(net_generation, na.rm=T)), by = c("isoyear", "fueltype")], aes(x = "",y = net_generation, fill = fueltype)) + 
    geom_bar(position = 'fill', stat="identity", width=1) +
    coord_polar("y", start=0)+
    ggtitle('Net Generation fuel composition by market') + 
    # geom_text(aes(x = 1.3, y = midpoint, label = labels)) +
    facet_wrap(~isoyear)+
    theme_void()

Why are they different or similar?

Generally speaking: * Capacity differs from net generation because more expensive plants should generate less than their theoretical max capacity, whereas cheap plants shouold have a high capacity factor. * Intermittency is also a reason why they are different - intermittency means that power plants can’t produce at full capacity all the time, so net generation/total theoretical generation should be less than a plant that isn’t intermittent, all other factors equal. * Dispatchability or lack-thereof - if a power plant is dispatchable, it is more likely to be used to turn on and off with wild short-term swings in electric power demand, but this has to be put in the context of the distribution of power plants available in that ISO.

How much does each fuel contribute to NOx, SO2, and CO2 emissions? Choose an appropriate plot type to answer this.

How much does each fuel contribute is a question about total emissions, not average emissions, so you should take the emissions rate, multiply by net generation, and that’s the actual total emissions for that ISO. Then, you could interpret this in two ways - either the contribution is total emissions, in which case a graph for each is fine. Or you could interpret the question as being what is the difference in the composition between NOx, SO2, and CO2 given the generation of your iso - which was harder, more involved, but much more informative.

# Set the groupby key
setkey(dt, isoyear, fueltype) # Students should take out isoyear

# These plot general numbers for each dataset
# Emissions by dataset - fueltype
ggplot(dt[,.(emissions = sum(heat_input*co2, na.rm=T)), by = key(dt)],aes(x = factor(isoyear), y= emissions, fill = factor(fueltype))) + 
  geom_bar(stat = 'identity', position = 'dodge') + 
  scale_y_continuous(name = "CO2 Emissions (lbs)") + 
  scale_x_discrete(name = "ISO and year") + 
  scale_fill_discrete(name = "Fuel Type")

# Emissions by dataset - fueltype
ggplot(dt[,.(emissions = sum(heat_input*nox, na.rm=T)), by = key(dt)],aes(x = factor(isoyear), y= emissions, fill = factor(fueltype))) + 
  geom_bar(stat = 'identity', position = 'dodge') + 
  scale_y_continuous(name = "NO2 Emissions (lbs)") + 
  scale_x_discrete(name = "ISO and year") + 
  scale_fill_discrete(name = "Fuel Type")

# Emissions by dataset - fueltype
ggplot(dt[,.(emissions = sum(heat_input*sox, na.rm=T)), by = key(dt)],aes(x = factor(isoyear), y= emissions, fill = factor(fueltype))) + 
  geom_bar(stat = 'identity', position = 'dodge') + 
  scale_y_continuous(name = "SOx Emissions (lbs)") + 
  scale_x_discrete(name = "ISO and year") + 
  scale_fill_discrete(name = "Fuel Type")

# These produce a CO2 graph for each dataset
for (name in names(alldata)){
  print(
    ggplot(dt[isoyear == name,.(emissions = sum(net_generation*co2, na.rm=T)), by = key(dt)],aes(x = factor(isoyear), y= emissions, fill = factor(fueltype))) + 
  geom_bar(stat = 'identity', position = 'dodge') +
  scale_y_continuous(name = "CO2 Emissions (lbs)") + 
  scale_x_discrete(name = "Fuel type") + 
  scale_fill_discrete(name = "Fuel Type") + 
  ggtitle(paste(name,"- Emissions by fuel type"))
  )
}

# Calculate the total emissions
dt[, tnox := nox*net_generation]
dt[, tsox := sox*net_generation]
dt[, tco2 := co2*net_generation]


# Create a table with emissions in columns, indexed by isoyear and fuel type.
emissions <- dt[,.(co2 = sum(tco2, na.rm = T),nox = sum(tnox, na.rm = T),sox= sum(tsox, na.rm = T)), by = c('isoyear','fueltype'),]
# Put the melted version, and I'm just using facetwrap to mkae one plot for each
ggplot(melt(emissions),aes(x = variable, y = value,fill = fueltype)) +
  geom_bar(position = 'fill',stat= 'identity') +
  facet_wrap(~isoyear)
## Warning in melt.data.table(emissions): id.vars and measure.vars are internally
## guessed when both are 'NULL'. All non-numeric/integer/logical type columns are
## considered id.vars, which in this case are columns [isoyear, fueltype]. Consider
## providing at least one of 'id' or 'measure' vars in future.

# Unnecessary data cleaning for the answer key. 
if (F){ # 
  # Assume that if there is na generation, that no electricity was produced
  dt[is.na(net_generation), net_generation:= 0] 
  
  # Checks which fuels have no emissions factors
  dt[, mean(co2, na.rm=T), by = key(dt)]
  dt[, mean(sox, na.rm=T), by = key(dt)]
  dt[, mean(nox, na.rm=T), by = key(dt)]
  
  # Assume that renewable fuels are zero-emissions if they do not already have a value assigned.
  renewables <- c('Solar','Uranium','Water','Wind')
  dt[is.na(co2) & fueltype %in% renewables, co2 := 0.]
  dt[is.na(nox) & fueltype %in% renewables, nox := 0.]
  dt[is.na(sox) & fueltype %in% renewables, sox := 0.]
  
  # This is a reasonable but potentially bad assumption if the distribution of emisssions factors is skewed/fat-tailed, but I will set the nas for other emissions factors to be the mean or max of the distribution
  dt[is.na(nox), nox := mean(nox, na.rm = T), by = c('fueltype','tech')]
  
  summary(dt)
}

Question 3

Organize the data and plot the generation supply curve using a different color for each fuel (Check here for a reference about supply curves.). The idea is to have a plot in which each plant is a dot, its height is its variable cost and its x- coordinate is the capacity of the system at a cost equal or lower than the plant’s. For this, you have to order generators according to variable cost, and calculate the cumulative capacity of the system. Use geom point such that each plant is a dot, but do not connect the dots. Label the plot properly, add a title and a legend.

# The vcost_fuel is a component o fthe the operating cost, we see here that it never goes above the x = y line. 
ggplot(dt, aes(x = vcost_om, y = vcost_fuel)) + 
  geom_point()

# Order generators according to variable cost
setorder(dt, isoyear, vcost_om)

# calculate the cumulative capacity of the system
dt[, cum_capacity := cumsum(capacity), by = isoyear]

# Draw the supply curve for all ISOs.
ggplot(dt, aes(x = cum_capacity, y = vcost_om, color = isoyear)) + 
  geom_point() + 
  xlab("Cumulative Summer Capacity (MW)")+ 
  ylab("Variable Cost ($/MWh)")

scs <- list()
# Draw the supply curve by iso using a different color for each uel. 
for (name in names(alldata)){
  scs[[name]] <-ggplot(dt[isoyear == name], aes(x = cum_capacity, y = vcost_om, color = fueltype)) + 
    geom_point() + 
    xlab("Cumulative Summer Capacity (MW)")+ 
    ylab("Variable Cost ($/MWh)")+
    ggtitle(paste(name, "Supply Curve"))
}
scs
## $ercot2018

## 
## $ercot2009

## 
## $ne2009

## 
## $miso2018

## 
## $ne2018

## 
## $ny2018

## 
## $pjm2009

ggplot(dt, aes(x = cum_capacity, y = vcost_om, color = fueltype)) + 
    geom_point() + 
  facet_wrap(~isoyear, nrow= 2, ncol = 4)

Question 4

In the supply curve, are fuels ordered by cost? What do you think is the role of cost in explaining the differences between the capacity and net generation shares of each fuel?

Fuels are ordered by variable cost, not total cost. The higher the variable cost is, the lower the capacity factor should be (this is net generation over total theoretical generation, which scales with capacity). ISOs create a merit order for each instant based on cost, subject to the constraint of dispatchability, and so cost should explain the differences up to that constraint.

Question 5

Now you will create three values that we will use to represent load. Let’s assume average load is 60% of capacity, winter peak is 80% of capacity, and summer peak is 90% of capacity.

Compute these three values of load.

load <- dt[, .(total_capacity = max(cum_capacity)), by = isoyear]
load[,`:=`(avg_load = .6*total_capacity, winter_peak = .8*total_capacity,summer_peak = .9*total_capacity)]

load[,avg_load := .6*total_capacity]
load[,winter_peak := .8*total_capacity]
load[,summer_peak := .9*total_capacity]

Part b

Add the load values to the supply curve plot as vertical lines. Save this plot as a pdf file using ggsave.

# Draw the supply curve by iso using a different color for each fuel. 
for (name in names(scs)){
  scs[[name]] <- scs[[name]] + 
    geom_vline(xintercept = load[isoyear ==name, avg_load])+
    geom_vline(xintercept = load[isoyear ==name, winter_peak])+
    geom_vline(xintercept = load[isoyear ==name, summer_peak])
  ggsave(paste("supplycurve", name, ".pdf", sep = ""), device = "pdf", width = 16, height = 9, units = "in")
}
scs
## $ercot2018

## 
## $ercot2009

## 
## $ne2009

## 
## $miso2018

## 
## $ne2018

## 
## $ny2018

## 
## $pjm2009

Part c

For each of these three load levels, find the price that would have cleared the market if price were equal to cost, i.e. find the point in the supply curve intersects the load curve (vertical line) in the plot.

# Set up the merge
setkey(load, isoyear)
setkey(dt, isoyear)
# join the loads to the data table
dt <- merge(dt, load)

# Find the prices using merge iteratively (using reduce)
# Each of the lines subsets the cumulative capacity above average load and then finds the minimum price, doing so for each ISO-YEAR. 
dt_price <- Reduce(merge, list(
dt[cum_capacity > avg_load, .(p_avg_load = min(vcost_om)),isoyear], 
dt[cum_capacity > winter_peak, .(p_winter_peak = min(vcost_om)),isoyear],
dt[cum_capacity > summer_peak, .(p_summer_peak = min(vcost_om)),isoyear],
load
))

# This is a less clean method of getting the price using a for loop, shown only for average load:
breakeven <- list()
for (name in names(alldata)){
  # Get average load
  avg_load <- load[isoyear == name, avg_load]
  # Get the 1st one with higher than average load
  breakeven[[name]] <- dt[isoyear == name][dt[isoyear == name, (cum_capacity > avg_load)]][1]
}
breakeven<- rbindlist(breakeven)
breakeven[, price := vcost_om]

dt_price
##      isoyear p_avg_load p_winter_peak p_summer_peak total_capacity  avg_load
## 1: ercot2009   29.99000       44.7300       53.9800       69740.91  41844.55
## 2: ercot2018   24.56000       30.0700       47.8200      100709.41  60425.65
## 3:  miso2018   28.34000       37.8200       61.5100      178797.81 107278.69
## 4:    ne2009   41.64609      100.9178      126.7989       28102.42  16861.45
## 5:    ne2018   44.67000      104.3200      138.4800       33794.44  20276.66
## 6:    ny2018   44.79000       65.1000      124.7700       41503.46  24902.08
## 7:   pjm2009   35.83000       56.7300       91.9500      159552.56  95731.54
##    winter_peak summer_peak
## 1:    55792.73    62766.82
## 2:    80567.53    90638.47
## 3:   143038.25   160918.03
## 4:    22481.94    25292.18
## 5:    27035.55    30415.00
## 6:    33202.77    37353.11
## 7:   127642.05   143597.30

Question 6

Suppose we want to know if the dispatch of power plants is efficient, i.e. if cheaper plants are dispatched first. Do cheaper power plants produce more? To check this, do the following:

Part a

Run an OLS regression of net generation on cost. What cost is the most relevant here? Try total cost and variable cost and argue why/how results vary with the cost definition. Briefly discuss.

Running ols on variable and fixed cost The coefficient on variable cost is negative, so generation decreases with higher cost. This makes sense if you consider the effect of cost on the merit order. The coefficient on fixed cost is positive, which means that more expensive plants (per kW) generate more. Several reasons - renewables have high fixed costs and they come first in the merit order, economies of scale means that cheaper fixed-cost plants are bigger and so generate more. But an omitted variable is capacity - it is correlated with both variable and fixed costs and has a direct effect on net generation.

Running OLS on total cost per year doesn’t quite make sense when you have more disaggregated data and you’re trying to figure out the mechanism, but the coefficient is positive in general. This is most probably the impact of capacity being an ommited variable.

I run everything using a fixed cost for the dataset that you use - your results will differ.

# Calculate the total costs first,
# fcost is in units of $/kW-year, need to compare to $/year
# vcost is in units of $/MWh, multiply by generation (mwh/year) to get to $/year. 
dt[, tcost_om := vcost_om * net_generation + fcost *capacity * 1000]
dt[, tcost_fuel := vcost_fuel * net_generation+ fcost *capacity *1000 ]
dt[, tcost_emissions := vcost_emissions * net_generation+ fcost *capacity*1000]

# Naive regression, noting that vcost_fuel is collinear with vcost_om
model1 <- lm("net_generation ~ vcost_om  + fcost + isoyear", dt)
summary(model1)
## 
## Call:
## lm(formula = "net_generation ~ vcost_om  + fcost + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5603673  -315503  -193888   -11308 11266304 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       521363.0    42528.6  12.259  < 2e-16 ***
## vcost_om           -1280.0      126.6 -10.107  < 2e-16 ***
## fcost               2811.0      284.7   9.874  < 2e-16 ***
## isoyearercot2018  246631.4    62450.7   3.949 7.91e-05 ***
## isoyearmiso2018  -295682.6    45975.1  -6.431 1.34e-10 ***
## isoyearne2009    -444507.5    53450.3  -8.316  < 2e-16 ***
## isoyearne2018    -249766.2    70405.7  -3.548 0.000391 ***
## isoyearny2018     -36690.7    73108.7  -0.502 0.615777    
## isoyearpjm2009   -142990.5    47452.5  -3.013 0.002593 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 980600 on 7486 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.05185,    Adjusted R-squared:  0.05083 
## F-statistic: 51.17 on 8 and 7486 DF,  p-value: < 2.2e-16
# Answers depend on ISO.  

# Same using total variable costs as in the question rather than 
model2 <- lm("net_generation ~ tcost_om  + fcost + isoyear", dt)
summary(model2)
## 
## Call:
## lm(formula = "net_generation ~ tcost_om  + fcost + isoyear", 
##     data = dt)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -12130194    -32244      1048     31283   4400263 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.311e+04  1.660e+04   4.404 1.08e-05 ***
## tcost_om          3.304e-02  1.612e-04 204.991  < 2e-16 ***
## fcost            -5.354e+02  1.119e+02  -4.786 1.73e-06 ***
## isoyearercot2018  1.084e+05  2.446e+04   4.433 9.41e-06 ***
## isoyearmiso2018  -4.950e+04  1.803e+04  -2.746  0.00605 ** 
## isoyearne2009    -9.268e+04  2.099e+04  -4.416 1.02e-05 ***
## isoyearne2018    -1.597e+05  2.752e+04  -5.802 6.81e-09 ***
## isoyearny2018    -6.992e+04  2.860e+04  -2.445  0.01450 *  
## isoyearpjm2009   -7.656e+04  1.842e+04  -4.157 3.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 383900 on 7486 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.8547, Adjusted R-squared:  0.8545 
## F-statistic:  5503 on 8 and 7486 DF,  p-value: < 2.2e-16
  1. Now control for capacity, how do results change?

Variable cost retains a negative coefficient, but its significance and magnitude goes down. We can interpret the coefficient as follows - if a plant costs 1$/MWh more, how much more MWh (it is negative, so less) is it likely to have generated? When we control for capacity, the decrease in coefficient shows that the impact of cost is not as dramatic. The coefficients for total and fixed costs also decrease but stay on the same order of magnitude.

# Naive regression, noting that vcost_fuel is collinear with vcost_om
model4 <- lm("net_generation ~ vcost_om + fcost + capacity + isoyear", dt)
summary(model4)
## 
## Call:
## lm(formula = "net_generation ~ vcost_om + fcost + capacity + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -5510586   -71061    75114   128040  4107435 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -235309.79   20376.10 -11.548  < 2e-16 ***
## vcost_om            -138.29      59.51  -2.324   0.0202 *  
## fcost               2061.40     132.94  15.507  < 2e-16 ***
## capacity            5693.98      34.72 163.980  < 2e-16 ***
## isoyearercot2018   14364.12   29178.08   0.492   0.6225    
## isoyearmiso2018    85749.67   21580.81   3.973 7.15e-05 ***
## isoyearne2009     105815.03   25168.28   4.204 2.65e-05 ***
## isoyearne2018      27631.48   32899.54   0.840   0.4010    
## isoyearny2018      -2998.83   34118.05  -0.088   0.9300    
## isoyearpjm2009    103679.24   22195.56   4.671 3.05e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 457600 on 7485 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.7935, Adjusted R-squared:  0.7933 
## F-statistic:  3197 on 9 and 7485 DF,  p-value: < 2.2e-16
# Same using total variable costs as in the question rather than 
model5 <- lm("net_generation ~ tcost_om  + fcost + capacity + isoyear", dt)
summary(model5)
## 
## Call:
## lm(formula = "net_generation ~ tcost_om  + fcost + capacity + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -8117911   -29329    47006    69181  3883823 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -7.575e+04  1.531e+04  -4.947 7.69e-07 ***
## tcost_om          2.219e-02  2.935e-04  75.630  < 2e-16 ***
## fcost             2.593e+02  1.021e+02   2.539  0.01114 *  
## capacity          2.237e+03  5.268e+01  42.464  < 2e-16 ***
## isoyearercot2018  6.252e+04  2.198e+04   2.844  0.00447 ** 
## isoyearmiso2018   2.004e+04  1.627e+04   1.232  0.21798    
## isoyearne2009     8.531e+03  1.899e+04   0.449  0.65332    
## isoyearne2018    -7.930e+04  2.478e+04  -3.200  0.00138 ** 
## isoyearny2018    -4.506e+04  2.568e+04  -1.755  0.07931 .  
## isoyearpjm2009   -3.413e+00  1.663e+04   0.000  0.99984    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 344700 on 7485 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.8829, Adjusted R-squared:  0.8827 
## F-statistic:  6270 on 9 and 7485 DF,  p-value: < 2.2e-16
  1. What else could you be missing that may lead to bias? Can you control for it? Add some control that you consider relevant and discuss how it changes the results.

We are looking at the effect of cost on generation, one thing we could be missing is the impact of intermittency and dispatchability. You could also control for fuel type if you interpret the coefficient as within-fuel impact of price on net_generation. I’ll show one then the other.

Interestingly, fixed cost LOSES its significance once you add dispatchability and intermittency or fueltype. Dispatchability and intermittency both have extremely negative coefficients, as you might expect since they consist of non-baseload plants and also the plants that can’t physically produce all the time.

# Print these to see the combinations of technologies
#unique(dt[, tech])
unique(dt[,.(tech, fueltype)])
##                    tech           fueltype
##  1:               Solar              Solar
##  2:                Wind               Wind
##  3:               Hydro              Water
##  4:       Steam Turbine         Other Fuel
##  5:             Nuclear            Uranium
##  6:       Steam Turbine               Coal
##  7:  Combustion Turbine        Natural Gas
##  8:      Combined Cycle        Natural Gas
##  9:  Combustion Turbine         Other Fuel
## 10:       Steam Turbine        Natural Gas
## 11: Internal Combustion        Natural Gas
## 12: Internal Combustion            Biomass
## 13:       Steam Turbine            Biomass
## 14:  Combustion Turbine            Biomass
## 15: Internal Combustion Petroleum Products
## 16:               Other         Other Fuel
## 17:      Pumped Storage              Water
## 18: Internal Combustion         Other Fuel
## 19:      Combined Cycle         Other Fuel
## 20:  Combustion Turbine Petroleum Products
## 21:      Combined Cycle            Biomass
## 22:       Steam Turbine Petroleum Products
## 23:               Other        Natural Gas
## 24:      Combined Cycle Petroleum Products
## 25:               Other            Biomass
## 26:               Other               Coal
##                    tech           fueltype
dispatchable_techs = c('Hydro','Steam Turbine','Combustion Turbine', 'Combined Cycle', 'Internal Combustion','Pumped Storage')
intermittent_techs = c('Solar','Wind')

dt[,dispatchable := 0]
dt[,intermittent := 0]
dt[tech %in% dispatchable_techs, dispatchable := 1]
dt[tech %in% intermittent_techs, intermittent := 1]

# Intermittency and dispatchability dummies
summary(lm("net_generation ~ vcost_om + fcost + intermittent+ dispatchable + isoyear", dt))
## 
## Call:
## lm(formula = "net_generation ~ vcost_om + fcost + intermittent+ dispatchable + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -6913844  -236431  -157693     8964 10021608 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       7.151e+06  9.039e+04  79.113  < 2e-16 ***
## vcost_om         -1.086e+03  9.545e+01 -11.382  < 2e-16 ***
## fcost            -2.341e+02  2.145e+02  -1.091   0.2752    
## intermittent     -6.803e+06  8.950e+04 -76.003  < 2e-16 ***
## dispatchable     -6.622e+06  8.499e+04 -77.916  < 2e-16 ***
## isoyearercot2018  2.544e+05  4.632e+04   5.492 4.09e-08 ***
## isoyearmiso2018  -2.516e+05  3.408e+04  -7.383 1.71e-13 ***
## isoyearne2009    -3.742e+05  3.973e+04  -9.417  < 2e-16 ***
## isoyearne2018    -2.730e+05  5.218e+04  -5.232 1.73e-07 ***
## isoyearny2018    -1.388e+05  5.422e+04  -2.559   0.0105 *  
## isoyearpjm2009   -2.074e+05  3.521e+04  -5.889 4.05e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 726700 on 7484 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.4794, Adjusted R-squared:  0.4787 
## F-statistic: 689.3 on 10 and 7484 DF,  p-value: < 2.2e-16
# Fueltype fixed effect
summary(lm("net_generation ~ vcost_om + fcost + fueltype + isoyear", dt))
## 
## Call:
## lm(formula = "net_generation ~ vcost_om + fcost + fueltype + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7441296  -136640   -40884    51434  8699805 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 309966.9    31653.4   9.793  < 2e-16 ***
## vcost_om                      -828.3      107.2  -7.728 1.23e-14 ***
## fcost                         -359.3      181.6  -1.978 0.047926 *  
## fueltypeCoal               1525598.6    31215.6  48.873  < 2e-16 ***
## fueltypeNatural Gas         169104.6    21032.3   8.040 1.03e-15 ***
## fueltypeOther Fuel           38168.5    63275.6   0.603 0.546387    
## fueltypePetroleum Products  102264.0    33773.3   3.028 0.002471 ** 
## fueltypeSolar              -109595.9    63840.3  -1.717 0.086072 .  
## fueltypeUranium            7624462.7    73958.9 103.091  < 2e-16 ***
## fueltypeWater               -34797.1    23714.4  -1.467 0.142326    
## fueltypeWind                 73841.1    31611.9   2.336 0.019525 *  
## isoyearercot2018            211972.9    37141.4   5.707 1.19e-08 ***
## isoyearmiso2018            -274437.1    27932.8  -9.825  < 2e-16 ***
## isoyearne2009              -200348.7    33487.6  -5.983 2.29e-09 ***
## isoyearne2018              -139676.3    42249.9  -3.306 0.000951 ***
## isoyearny2018               -54343.9    43776.2  -1.241 0.214497    
## isoyearpjm2009             -189418.9    28995.1  -6.533 6.88e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 581900 on 7478 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.6665, Adjusted R-squared:  0.6658 
## F-statistic:   934 on 16 and 7478 DF,  p-value: < 2.2e-16
# Technology fixed effect
summary(lm("net_generation ~ vcost_om + fcost + tech + isoyear", dt))
## 
## Call:
## lm(formula = "net_generation ~ vcost_om + fcost + tech + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7518482  -117861   -30158    55944  9504868 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              619154.38   30118.78  20.557  < 2e-16 ***
## vcost_om                   -602.90      95.59  -6.307 3.01e-10 ***
## fcost                     -1559.08     201.29  -7.746 1.08e-14 ***
## techCombustion Turbine  -411056.68   28949.67 -14.199  < 2e-16 ***
## techHydro               -372620.64   27430.85 -13.584  < 2e-16 ***
## techInternal Combustion -443320.09   27277.22 -16.252  < 2e-16 ***
## techNuclear             7348121.03   81016.88  90.699  < 2e-16 ***
## techOther               -429872.66  224299.67  -1.917   0.0553 .  
## techPumped Storage      -313733.04   79847.44  -3.929 8.60e-05 ***
## techSolar               -478256.00   69307.84  -6.900 5.61e-12 ***
## techSteam Turbine        352157.92   27694.51  12.716  < 2e-16 ***
## techWind                -296896.00   33994.83  -8.734  < 2e-16 ***
## isoyearercot2018         242064.23   40334.19   6.001 2.05e-09 ***
## isoyearmiso2018         -141436.40   30365.38  -4.658 3.25e-06 ***
## isoyearne2009           -175000.32   36162.53  -4.839 1.33e-06 ***
## isoyearne2018           -202593.25   45798.61  -4.424 9.85e-06 ***
## isoyearny2018            -66698.44   47702.76  -1.398   0.1621    
## isoyearpjm2009           -53806.90   31546.29  -1.706   0.0881 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 631300 on 7477 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.6075, Adjusted R-squared:  0.6066 
## F-statistic: 680.9 on 17 and 7477 DF,  p-value: < 2.2e-16
# Both fueltype and technology
summary(lm("net_generation ~ vcost_om + fcost + fueltype +  tech + isoyear", dt))
## 
## Call:
## lm(formula = "net_generation ~ vcost_om + fcost + fueltype +  tech + isoyear", 
##     data = dt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -7476486   -91406   -19620    59691  8702253 
## 
## Coefficients: (4 not defined because of singularities)
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 635410.2    40147.2  15.827  < 2e-16 ***
## vcost_om                      -360.8      109.8  -3.286  0.00102 ** 
## fcost                         -751.1      185.3  -4.054 5.09e-05 ***
## fueltypeCoal               1394652.8    39165.9  35.609  < 2e-16 ***
## fueltypeNatural Gas           -553.8    28272.7  -0.020  0.98437    
## fueltypeOther Fuel          -48495.0    66123.0  -0.733  0.46333    
## fueltypePetroleum Products   37707.3    33717.7   1.118  0.26346    
## fueltypeSolar              -463724.0    68150.5  -6.804 1.09e-11 ***
## fueltypeUranium            7288940.7    77282.4  94.316  < 2e-16 ***
## fueltypeWater              -325748.4    76893.6  -4.236 2.30e-05 ***
## fueltypeWind               -281100.1    41053.3  -6.847 8.13e-12 ***
## techCombustion Turbine     -419056.9    26365.8 -15.894  < 2e-16 ***
## techHydro                   -67152.6    71788.3  -0.935  0.34960    
## techInternal Combustion    -447300.4    31820.5 -14.057  < 2e-16 ***
## techNuclear                       NA         NA      NA       NA    
## techOther                  -550918.5   207047.6  -2.661  0.00781 ** 
## techPumped Storage                NA         NA      NA       NA    
## techSolar                         NA         NA      NA       NA    
## techSteam Turbine          -250265.9    31401.0  -7.970 1.82e-15 ***
## techWind                          NA         NA      NA       NA    
## isoyearercot2018            197177.2    36502.5   5.402 6.80e-08 ***
## isoyearmiso2018            -217521.5    27600.9  -7.881 3.71e-15 ***
## isoyearne2009              -172023.9    32979.4  -5.216 1.88e-07 ***
## isoyearne2018              -162166.2    41663.0  -3.892  0.00010 ***
## isoyearny2018               -33642.9    43268.9  -0.778  0.43687    
## isoyearpjm2009             -120042.8    28733.7  -4.178 2.98e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 570800 on 7473 degrees of freedom
##   (3913 observations deleted due to missingness)
## Multiple R-squared:  0.6794, Adjusted R-squared:  0.6785 
## F-statistic:   754 on 21 and 7473 DF,  p-value: < 2.2e-16
  1. (Extra credit) Now you will calculate the profits that each generator would have made if the price had been the price you find assuming average load (the price on an average hour).
  1. First, calculate profits, which are given by (P − mc)Q. Use variable cost as marginal cost, quantity is net generation. Describe profits by fuel type. For this, create a table that includes minimum, percentile 25, mean, median, percentile 75, and max value for each fuel type (fuel types are rows).
# Merge in the prices. 
dt <- merge(dt, dt_price[,.(isoyear, p_avg_load, p_winter_peak, p_summer_peak)])

dt[, profits := (p_avg_load - vcost_om)*net_generation]

# Ge tthe profits grouping over isoyear and fueltype, (not filtering for na net_generation causes bugs)
#https://stackoverflow.com/questions/25928708/using-summary-function-inside-data-table
dt[!is.na(net_generation), as.list(summary(profits, na,rm)),.(isoyear, fueltype)]
##       isoyear           fueltype          Min.       1st Qu.        Median
##  1: ercot2009               Wind  5.467949e+05   5089587.500   8508462.220
##  2: ercot2009              Water -5.853080e+03     20270.460     94938.690
##  3: ercot2009         Other Fuel -8.049397e+05    -18103.395      1919.115
##  4: ercot2009            Uranium  1.857368e+08 193900455.272 202325303.890
##  5: ercot2009               Coal  2.184632e+05  14700002.470  46630789.920
##  6: ercot2009        Natural Gas -1.905469e+07  -1157188.080   -218314.900
##  7: ercot2009            Biomass -5.334610e+05   -327051.780   -321466.950
##  8: ercot2009 Petroleum Products -3.330300e+04    -12020.680     -6060.430
##  9: ercot2018              Solar  4.364312e+04    505395.680   3795809.400
## 10: ercot2018               Wind  9.435957e+05   8945067.000  14037686.200
## 11: ercot2018         Other Fuel -5.043443e+06   -183629.700    -71039.600
## 12: ercot2018            Uranium  1.359154e+08 139625356.310 141917155.520
## 13: ercot2018        Natural Gas -1.294768e+07   -835737.555    -90831.630
## 14: ercot2018               Coal -2.491844e+07   -411636.060   4201653.000
## 15: ercot2018              Water  1.191024e+05    119102.400    119102.400
## 16: ercot2018            Biomass -1.378693e+07   -460934.640   -407511.700
## 17: ercot2018 Petroleum Products -4.842304e+04    -11476.980     -9076.320
## 18:  miso2018         Other Fuel -9.957050e+05   -240775.840    503318.910
## 19:  miso2018              Solar  5.668000e+01     15558.660     45570.720
## 20:  miso2018               Wind  6.453140e+03    114107.070   1079130.010
## 21:  miso2018              Water -1.446215e+06     45269.100     75641.500
## 22:  miso2018        Natural Gas -2.751005e+07   -393262.320    -70537.200
## 23:  miso2018               Coal -1.731176e+08  -1125873.370    206500.700
## 24:  miso2018            Uranium  8.858598e+06  54430957.002  96378721.920
## 25:  miso2018            Biomass -2.143010e+07   -206341.740   -126752.400
## 26:  miso2018 Petroleum Products -1.590694e+06    -26823.720    -13194.380
## 27:    ne2009              Solar  1.749136e+03      1749.136      1749.136
## 28:    ne2009              Water  1.708278e+03    107712.333    252741.318
## 29:    ne2009               Wind  6.593137e+02     28697.670    391835.204
## 30:    ne2009            Uranium  1.924092e+08 230971319.084 269533452.282
## 31:    ne2009            Biomass -3.328731e+07 -11558778.659   -627787.748
## 32:    ne2009               Coal -9.900114e+06  -8262436.995  -7596081.017
## 33:    ne2009        Natural Gas -1.463065e+07   -315726.402     -5354.163
## 34:    ne2009 Petroleum Products -2.249807e+07   -146022.238    -79097.556
## 35:    ne2018              Solar  1.027410e+03     27371.543    116097.330
## 36:    ne2018               Wind  3.299968e+04    103243.560    129665.530
## 37:    ne2018              Water -2.699884e+04     20046.400     89595.540
## 38:    ne2018            Uranium  2.028057e+08 270238089.935 337670517.760
## 39:    ne2018            Biomass -3.476886e+07 -15483635.160  -8047522.780
## 40:    ne2018         Other Fuel  3.051390e+06   3051390.280   3051390.280
## 41:    ne2018        Natural Gas -1.059277e+07   -295598.090    300097.260
## 42:    ne2018 Petroleum Products -2.189818e+07   -832986.925   -109156.570
## 43:    ne2018               Coal -3.607906e+06  -2359889.280   -875334.600
## 44:    ny2018              Solar  7.144005e+04    599570.137   1127700.225
## 45:    ny2018              Water -4.858794e+05    227919.780    651037.540
## 46:    ny2018               Wind  4.858925e+04   2440811.353   4739416.585
## 47:    ny2018            Uranium  1.510469e+08 186413138.550 239641158.665
## 48:    ny2018        Natural Gas -1.221889e+07   -609271.910     34170.960
## 49:    ny2018            Biomass -4.750612e+07  -8945889.400    -85959.720
## 50:    ny2018               Coal  2.602164e+04    138830.945    251640.250
## 51:    ny2018 Petroleum Products -2.738094e+06   -755588.648   -660405.760
## 52:   pjm2009               Coal -2.420815e+07     -8191.740   5181760.195
## 53:   pjm2009         Other Fuel -8.649859e+05    -59064.520     -4228.620
## 54:   pjm2009              Solar  5.732800e+02     14439.490     14439.490
## 55:   pjm2009              Water -4.394479e+06     45989.760    207239.960
## 56:   pjm2009               Wind  4.839516e+04    737451.708   2453929.880
## 57:   pjm2009            Uranium  5.629456e+06 177498693.440 217915858.020
## 58:   pjm2009            Biomass -6.314785e+07    -93674.840     -9382.500
## 59:   pjm2009        Natural Gas -1.536871e+07   -583203.920   -137888.100
## 60:   pjm2009 Petroleum Products -9.399215e+06    -39887.560    -15676.320
##       isoyear           fueltype          Min.       1st Qu.        Median
##              Mean       3rd Qu.          Max.
##  1:   8204286.707  10440963.540  16488473.780
##  2:    306335.170    617925.240   1277543.280
##  3:   1610471.486   2648624.190   6550297.500
##  4: 202872993.457 211297842.075 221104577.400
##  5:  44572157.657  59766518.900 108680145.120
##  6:   -459651.298    388571.760  16380052.000
##  7:   -283419.939   -160578.600     -6531.320
##  8:    -11117.526     -6060.430     -5397.040
##  9:   3843801.394   6355342.080   8662336.560
## 10:  13865420.718  16978345.800  26101707.520
## 11:   -624636.751   1513421.910   1716197.060
## 12: 141728292.177 144020091.387 147163489.190
## 13:    668228.714   1593209.687  11035420.200
## 14:   9153806.372  15294656.700  65027516.340
## 15:    119102.400    119102.400    119102.400
## 16:  -2200071.304   -106753.845    -70683.340
## 17:    -13278.696     -9076.320     -5351.600
## 18:   1674060.033   2435462.065  11906686.440
## 19:    108384.126    106104.960    505273.860
## 20:   3535629.219   5709473.400  36024179.610
## 21:    308224.640    294970.000   3487898.400
## 22:    649001.487    511645.770  14059458.660
## 23:   2323944.014   4053617.560  61294437.960
## 24:  94172580.288 122200052.650 202310934.760
## 25:   -518761.508    -30862.862   5947548.430
## 26:    -42339.391    -10115.850      -157.580
## 27:      1749.136      1749.136      1749.136
## 28:    638041.478    711384.329   6423670.665
## 29:   1348881.136   1702470.769   5548662.675
## 30: 261076496.551 295410151.883 321286851.484
## 31:  -6098636.698   -146410.840  12583793.915
## 32:  -2317660.704  -5527187.719  19697515.993
## 33:    387584.118   1245788.968   7315470.687
## 34:   -596167.641    -19509.632     86593.517
## 35:    100323.236    153106.425    203918.550
## 36:   2057765.936    483259.962  21310934.920
## 37:    307322.487    293248.040   2456100.000
## 38: 298196979.613 345892638.365 354114758.970
## 39:  -8530433.114     66443.010  19105477.880
## 40:   3051390.280   3051390.280   3051390.280
## 41:   1939662.611   2948845.980  21387109.640
## 42:  -1550849.063    -35102.685    225035.880
## 43:   -746971.532   -448867.800   3557140.260
## 44:   1127700.225   1655830.312   2183960.400
## 45:  13444542.959  20344458.960  54163725.840
## 46:   5602115.654   9104601.230  13443471.360
## 47: 237051125.338 272974752.033 339221157.300
## 48:   3218468.216   8786654.850  21616391.160
## 49:  -6104233.449    -85959.720   2831254.800
## 50:   1193481.173   1777210.940   3302781.630
## 51:   -727979.213   -383261.402    -27366.290
## 52:  14543465.458  21336689.182 136409469.200
## 53:    424821.197    335723.300   3239746.560
## 54:     27038.000     24794.360    127662.290
## 55:   1094309.154   1712335.590   7264358.400
## 56:   4256188.393   6029635.500  19819375.540
## 57: 199786124.129 243439303.050 264297621.900
## 58:   -696980.924     -3828.720   5598616.100
## 59:   -288205.597    -14198.880  52582176.240
## 60:    -85615.570     -4747.080      1518.100
##              Mean       3rd Qu.          Max.

Part b)

Now compute total profits considering fixed costs (P −mc)Q−F, and create the same table as above. Do firms cover their costs?

Many firms do not cover their costs. You should reference a percentile to say something like the median xyz firms cover their costs, the median xyz firms don’t.

Interestingly, across ISOs the median Uranium plants consistently cover their costs, and most renewable plants too. It is predominantly natural gas, biomass, and petroleum product fueled plants where the median plant in all ISOs don’t cover their costs.

dt[,total_profits := profits - fcost*capacity*1000]
profits <- dt[!is.na(net_generation), as.list(summary(total_profits, na,rm)),.(isoyear, fueltype)]
setorder(profits, Median)
profits
##       isoyear           fueltype          Min.       1st Qu.        Median
##  1:    ne2009               Coal -2.989130e+07 -1.323333e+07 -1.260883e+07
##  2:    ne2018            Biomass -3.640852e+07 -1.654206e+07 -9.714643e+06
##  3:    ne2018               Coal -8.929498e+06 -6.300074e+06 -4.491182e+06
##  4: ercot2018               Coal -3.528385e+07 -1.885897e+07 -4.043248e+06
##  5:    ny2018               Coal -1.550460e+07 -9.553703e+06 -3.602810e+06
##  6:  miso2018               Coal -1.916431e+08 -5.835260e+06 -1.164002e+06
##  7:    ny2018 Petroleum Products -7.269669e+06 -1.159090e+06 -1.075778e+06
##  8:   pjm2009         Other Fuel -2.537017e+06 -1.344231e+06 -1.025695e+06
##  9: ercot2018        Natural Gas -2.118352e+07 -1.900574e+06 -9.187952e+05
## 10:    ne2009            Biomass -3.474261e+07 -1.339746e+07 -9.110718e+05
## 11: ercot2009        Natural Gas -2.307409e+07 -2.096991e+06 -6.845273e+05
## 12: ercot2018         Other Fuel -5.323443e+06 -1.093436e+06 -6.548396e+05
## 13: ercot2018            Biomass -1.449993e+07 -1.103653e+06 -4.603477e+05
## 14:   pjm2009        Natural Gas -2.271271e+07 -1.159004e+06 -3.872645e+05
## 15: ercot2009            Biomass -2.556809e+06 -4.379044e+05 -3.391478e+05
## 16:    ne2018 Petroleum Products -2.189818e+07 -1.485535e+06 -2.804003e+05
## 17:    ne2009        Natural Gas -1.648132e+07 -9.017578e+05 -2.624734e+05
## 18: ercot2009         Other Fuel -2.220190e+06 -8.328901e+05 -2.600367e+05
## 19:  miso2018        Natural Gas -3.613767e+07 -8.607858e+05 -2.563125e+05
## 20:    ne2018        Natural Gas -1.603148e+07 -8.394610e+05 -2.476524e+05
## 21:  miso2018            Biomass -2.210440e+07 -3.039604e+05 -1.481845e+05
## 22:    ne2009 Petroleum Products -2.743372e+07 -3.019095e+05 -1.379362e+05
## 23:    ny2018            Biomass -4.915967e+07 -1.024447e+07 -9.925572e+04
## 24: ercot2009              Water -2.607091e+05 -1.365027e+05 -5.040704e+04
## 25:    ny2018        Natural Gas -2.296081e+07 -9.272954e+05 -5.039584e+04
## 26: ercot2018 Petroleum Products -6.324904e+04 -3.724632e+04 -3.724632e+04
## 27:   pjm2009 Petroleum Products -1.986088e+07 -1.142091e+05 -3.098706e+04
## 28:  miso2018 Petroleum Products -1.708861e+06 -5.994786e+04 -2.901240e+04
## 29: ercot2009 Petroleum Products -4.239100e+04 -2.322068e+04 -1.341043e+04
## 30:   pjm2009            Biomass -6.508545e+07 -9.897384e+04 -1.168650e+04
## 31: ercot2018              Water -5.313600e+03 -5.313600e+03 -5.313600e+03
## 32:    ne2009              Solar -7.198642e+02 -7.198642e+02 -7.198642e+02
## 33:   pjm2009              Solar -2.216367e+05  4.732830e+03  9.501490e+03
## 34:  miso2018              Solar -3.970095e+05 -2.846052e+04  1.723194e+04
## 35:    ne2009               Wind -2.012454e+06 -3.751103e+04  2.247282e+04
## 36:  miso2018         Other Fuel -2.490935e+06 -8.979145e+05  2.304534e+04
## 37:  miso2018              Water -2.304155e+06  2.708640e+03  4.869376e+04
## 38:    ne2018              Solar -1.739782e+04  2.278035e+04  5.611529e+04
## 39:    ne2018              Water -1.143420e+05 -1.053025e+04  6.051054e+04
## 40:    ne2018               Wind  3.454680e+03  4.859856e+04  6.178953e+04
## 41:   pjm2009              Water -5.279579e+06  5.349040e+03  1.127400e+05
## 42:    ne2009              Water -1.572815e+05  5.080412e+04  1.773444e+05
## 43:  miso2018               Wind -4.596052e+06  3.322496e+04  5.086349e+05
## 44:    ny2018              Water -7.339944e+05 -1.153294e+04  6.005636e+05
## 45:    ny2018              Solar  3.748005e+04  3.512376e+05  6.649952e+05
## 46: ercot2018              Solar -4.820711e+06  6.982226e+04  1.250951e+06
## 47:   pjm2009               Coal -3.541771e+07 -3.549688e+06  1.378734e+06
## 48:    ne2018         Other Fuel  1.522170e+06  1.522170e+06  1.522170e+06
## 49:   pjm2009               Wind -1.126724e+07  1.309536e+04  1.814897e+06
## 50:    ny2018               Wind -3.251775e+04  7.841748e+05  3.899937e+06
## 51: ercot2009               Wind -8.996205e+06  3.954388e+06  6.893410e+06
## 52:  miso2018            Uranium -5.872562e+07 -3.902225e+06  8.500041e+06
## 53: ercot2018               Wind -1.336940e+07  7.459220e+06  1.126311e+07
## 54: ercot2018            Uranium  1.873722e+07  2.031992e+07  2.146358e+07
## 55: ercot2009               Coal -6.244536e+06  3.895402e+06  3.295559e+07
## 56: ercot2009            Uranium  1.022221e+08  1.055242e+08  1.151509e+08
## 57:    ny2018            Uranium  6.332782e+07  1.081154e+08  1.281600e+08
## 58:   pjm2009            Uranium -3.370426e+08  9.841017e+07  1.413137e+08
## 59:    ne2009            Uranium  1.243555e+08  1.483576e+08  1.723598e+08
## 60:    ne2018            Uranium  9.748078e+07  1.517621e+08  2.060434e+08
##       isoyear           fueltype          Min.       1st Qu.        Median
##              Mean       3rd Qu.          Max.
##  1: -1.177955e+07 -1.255534e+07  9.391021e+06
##  2: -9.484659e+06 -5.111053e+04  1.757718e+07
##  3: -5.320632e+06 -3.646615e+06 -3.235789e+06
##  4: -6.508987e+06 -1.429514e+05  4.120512e+07
##  5: -7.509362e+06 -3.511745e+06 -3.420680e+06
##  6: -3.562078e+06  1.922418e+05  3.535740e+07
##  7: -1.384892e+06 -5.213829e+05 -6.605799e+04
##  8: -7.307365e+05 -2.089186e+05  2.109046e+06
##  9: -7.229303e+05  1.895849e+05  8.998493e+06
## 10: -7.108134e+06 -1.697512e+05  1.006530e+07
## 11: -1.359839e+06 -7.716522e+04  1.513870e+07
## 12: -1.254856e+06  5.058719e+05  1.518517e+06
## 13: -2.625174e+06 -4.149038e+05 -3.788333e+05
## 14: -9.234570e+05 -9.024320e+04  3.963736e+07
## 15: -4.420651e+05 -3.155872e+05 -1.710226e+05
## 16: -1.969158e+06 -7.967143e+04  1.858639e+05
## 17: -3.980902e+05  4.755860e+05  5.729873e+06
## 18:  8.037444e+05  1.342012e+06  6.362698e+06
## 19: -3.738711e+05 -1.358394e+04  1.215851e+07
## 20:  5.410997e+05  9.172621e+05  1.847529e+07
## 21: -7.140605e+05 -6.331243e+04  4.883304e+06
## 22: -1.024793e+06 -3.736077e+04  7.200114e+04
## 23: -6.660928e+06 -9.925572e+04  4.331648e+05
## 24:  1.284918e+05  3.360602e+05  1.104343e+06
## 25:  1.286511e+06  7.008571e+06  1.905924e+07
## 26: -3.501490e+04 -2.877948e+04 -1.150760e+04
## 27: -2.518587e+05 -8.656400e+03 -2.624220e+03
## 28: -7.172407e+04 -2.109322e+04 -2.185800e+02
## 29: -1.914953e+04 -1.341043e+04 -1.043704e+04
## 30: -8.149126e+05 -6.081720e+03  5.088046e+06
## 31: -5.313600e+03 -5.313600e+03 -5.313600e+03
## 32: -7.198642e+02 -7.198642e+02 -7.198642e+02
## 33: -2.119714e+03  1.244936e+04  9.062729e+04
## 34:  1.753170e+04  3.945172e+04  2.408739e+05
## 35:  8.790478e+05  1.285742e+06  4.908981e+06
## 36:  8.083694e+05  1.240498e+06  1.016306e+07
## 37:  1.966999e+05  1.871868e+05  2.659687e+06
## 38:  4.902949e+04  7.884339e+04  1.133586e+05
## 39:  1.907475e+05  2.435680e+05  1.288100e+06
## 40:  1.658528e+06  2.310084e+05  1.878319e+07
## 41:  9.293738e+05  1.577404e+06  6.761258e+06
## 42:  5.497030e+05  6.154508e+05  6.150702e+06
## 43:  2.665109e+06  4.280077e+06  3.143425e+07
## 44:  1.294523e+07  1.987515e+07  5.254526e+07
## 45:  6.649952e+05  9.787528e+05  1.292510e+06
## 46:  1.165813e+06  3.108915e+06  4.417337e+06
## 47:  4.264082e+06  8.523644e+06  7.691365e+07
## 48:  1.522170e+06  1.522170e+06  1.522170e+06
## 49:  2.527348e+06  4.677997e+06  1.752654e+07
## 50:  4.527802e+06  7.791282e+06  1.136792e+07
## 51:  6.153045e+06  8.328568e+06  1.420847e+07
## 52:  3.901098e+06  1.323609e+07  8.430470e+07
## 53:  1.074653e+07  1.420984e+07  2.208771e+07
## 54:  2.350989e+07  2.465355e+07  3.237519e+07
## 55:  3.187119e+07  4.400963e+07  8.794229e+07
## 56:  1.159079e+08  1.255347e+08  1.311078e+08
## 57:  1.303426e+08  1.510570e+08  2.022848e+08
## 58:  9.797060e+07  1.618347e+08  1.786221e+08
## 59:  1.738075e+08  1.985335e+08  2.247071e+08
## 60:  1.705210e+08  2.070411e+08  2.080389e+08
##              Mean       3rd Qu.          Max.
  1. (Extra credit) Now we will repeat the same exercise but with social cost instead of private cost.
  1. We will use $50 for the social cost of carbon, but write your code using it as a parameter such that you can easily change it (Define scc as a variable at the beginning, and use scc instead of the actual value in the code. ).
scc <- 10
  1. Compute the social variable cost for each generation. First, combine heat rate, heat input, and CO2 emissions rate to obtain CO2 tons emitted per MWh. In the data we have the emission rates in lbs/MMBTU for CO2. We also have the heat input in MMBTU. Therefore, by multiplying both variables we can construct the emission in lbs. Then, multiplying this variable by a constant k = 0.00045359237 we can convert from lbs to tons. Finally, we know the generation in MWh. So, we can calculate the emissions in tons per MWh. CO2 Emissions ratei (tons/MWH) = Emission ratei (lbs/MMBTU) · Heat inputi (MMBTU) · k (ton/lbs) Generationi (M W h)
#dt[,tco2_mwh := co2 * 0.00045359237 * heat_input/net_generation] # CO2 is now tco2/MWh
# Alternatively you could do this
dt[,tco2_mwh := co2 * 0.00045359237 * heat_rate/1000] # heat rate is btu/kwh 
dt[is.na(tco2_mwh), tco2_mwh:=0]
dt[, external_cost_co2  := tco2_mwh*(scc)] # $/MWh since scc is $/tco2, and co2 is tco2/Mwh
dt[, social_cost := vcost_om + external_cost_co2]
  1. Plot the social cost supply curve and load. Find the price that would clear if load was equal to average load (call this P S ), and the corresponding quantity.
# Check what the scatter plot looks like 
ggplot(dt) + geom_point(aes(x = social_cost, y = vcost_om))

# Note that the supply curve will change the order of plants in our supply curve. So we have to recalculate cumulative capacity. 

# Order by social cost
setorder(dt, social_cost)
# Cumulative capacity in the social cost order
dt[, cum_capacity_sc := cumsum(capacity), isoyear]

scscs <- list() # Social cost supply curves
for (name in names(alldata)){
  scscs[[name]] <-ggplot(dt[isoyear == name], aes(x = cum_capacity_sc, y = social_cost, color = fueltype)) + 
    geom_point() + 
    geom_point(aes(x = cum_capacity, y = vcost_om), alpha = 0.5, size = 0.5,shape = 0) + # I plot the private costs below it
    geom_vline(xintercept = load[isoyear == name, avg_load]) +
    xlab("Cumulative Summer Capacity (MW)")+ 
    ylab("Social Cost ($/MWh)")+
    ggtitle(paste(name, "Supply Curve"))
}
scscs
## $ercot2018

## 
## $ercot2009

## 
## $ne2009

## 
## $miso2018

## 
## $ne2018

## 
## $ny2018

## 
## $pjm2009

dt_price2 <- merge(dt_price,dt[cum_capacity_sc>avg_load, .(p_s = min(social_cost)), isoyear])
dt_price2[, .(isoyear, p_avg_load, p_s)]
##      isoyear p_avg_load      p_s
## 1: ercot2009   29.99000 34.32041
## 2: ercot2018   24.56000 29.65410
## 3:  miso2018   28.34000 37.06770
## 4:    ne2009   41.64609 45.73754
## 5:    ne2018   44.67000 48.77713
## 6:    ny2018   44.79000 52.16582
## 7:   pjm2009   35.83000 41.68350
dt <- merge(dt, dt_price2[,.(isoyear, p_s)], by = 'isoyear')
  1. (Extra credit) Now we will compare total emissions in a world that dispatches plants according to private variable cost (as it is today), with a world in which plants are dispatched according to social cost (which would happen with a tax on carbon, for example).
  1. Calculate total emissions during an average hour assuming firms are dispatched according to social cost. For this, you have to follow these steps:
  1. Create variable that indicates whether a plant operates or not in an average hour (it operates if its social cost is lower than or equal to PS).
  2. Create another variable with each plant’s emissions assuming they produce at capacity (why capacity? Think of the supply curve).
  3. Then add up emissions for all plants that operate when the price is lower than or equal to PS.
# You can actually do this in one line
part9 <- dt[social_cost<p_s, .(co2ph_social = sum(tco2_mwh*capacity)), isoyear] # co2 is now tco2/mwh --> tco2/h

# Rx (as prescribed)
# i 
dt[, emits := 0]
dt[social_cost<p_s, emits := 1]
# ii 
dt[, emissions := tco2_mwh*capacity]
# iii (compares both methods)
merge(part9, dt[, .(co2ph_social = sum(emits*emissions, na.rm= T)), isoyear])
##      isoyear co2ph_social.x co2ph_social.y
## 1: ercot2009      15543.559      15543.559
## 2: ercot2018      12160.970      12160.970
## 3:  miso2018      53089.568      53089.568
## 4:    ne2009       3980.589       3980.589
## 5:    ne2018       4352.003       4352.003
## 6:    ny2018       4780.778       4780.778
## 7:   pjm2009      42186.813      42186.813
  1. Find total emissions for the case in which plants are dispatched according to private variable cost, not social cost, using the procedure above. Notice that now a plant operates if its variable cost is at or below the clearing price calculated in 5c.
# Compare co2 emissions per hour under different regimes
part9 <- merge(part9, dt[p_avg_load > vcost_om , .(co2ph_private = sum(tco2_mwh*capacity)), isoyear])
part9
##      isoyear co2ph_social co2ph_private
## 1: ercot2009    15543.559     16487.601
## 2: ercot2018    12160.970     16430.658
## 3:  miso2018    53089.568     58183.043
## 4:    ne2009     3980.589      3959.925
## 5:    ne2018     4352.003      4372.975
## 6:    ny2018     4780.778      4845.673
## 7:   pjm2009    42186.813     45556.965
  1. How many tons could we save in an hour if the social cost of carbon were internalized? And in a day? And in a year? Comment about how the fuel shares of this particular market affect the results of this exercise.

Depends on the market. Fuel shares by market basically determine how the merit order changes with the CO2 tax. Savings can be very large (e.g. ERCOT) or small (NEISO), depedning on how much fossil fuels that are marginal to thte tax.

part9[,co2_savings := co2ph_private-co2ph_social]
part9[,co2_savings_annual := co2_savings * 365]
part9
##      isoyear co2ph_social co2ph_private co2_savings co2_savings_annual
## 1: ercot2009    15543.559     16487.601   944.04207         344575.357
## 2: ercot2018    12160.970     16430.658  4269.68815        1558436.174
## 3:  miso2018    53089.568     58183.043  5093.47491        1859118.341
## 4:    ne2009     3980.589      3959.925   -20.66359          -7542.209
## 5:    ne2018     4352.003      4372.975    20.97195           7654.762
## 6:    ny2018     4780.778      4845.673    64.89461          23686.531
## 7:   pjm2009    42186.813     45556.965  3370.15201        1230105.485
  1. (Bonus) How does the above answer change with different values for the social cost of carbon?

There should be more CO2 savings for a higher SCC. The larger the tax, the more fossil fuel plants that will be marginal. (e) (Extra bonus) Can you write a function that helps you to compute this?

NY and NE ISO have weird non-monotonically increasing results

co2_savings <- function(scc, dt = dt){
  dt[, social_cost := vcost_om + tco2_mwh*scc]
  setorder(dt, social_cost)
  dt[, cum_capacity_sc := cumsum(capacity), isoyear]
  dt[, p_s := NULL]
  dt <- merge(dt, dt[cum_capacity_sc>avg_load, .(p_s = min(social_cost)), isoyear])
  dcast(dt[social_cost<p_s, .(co2ph = sum(tco2_mwh*capacity, na.rm= T), scc = paste0('co2ph',scc)), isoyear],"isoyear ~ scc", value.var = "co2ph")
}

emissions <- Reduce(merge, lapply(0:20*2, co2_savings, dt = dt))
## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove

## Warning in `[.data.table`(dt, , `:=`(p_s, NULL)): Column 'p_s' does not exist to
## remove
emissions
##      isoyear    co2ph0    co2ph2    co2ph4    co2ph6    co2ph8   co2ph10
## 1: ercot2009 16487.601 16985.324 16985.324 16551.190 16505.163 15543.559
## 2: ercot2018 16430.658 16094.676 15678.049 14856.234 13913.969 12160.970
## 3:  miso2018 58183.043 56997.807 56617.248 56507.793 53893.906 53089.568
## 4:    ne2009  3959.925  4050.878  3978.124  3980.589  3980.589  3980.589
## 5:    ne2018  4372.975  4370.367  4370.367  4358.465  4352.003  4352.003
## 6:    ny2018  4845.673  4973.151  4885.903  4748.900  4780.778  4780.778
## 7:   pjm2009 45556.965 44683.344 43964.710 43205.871 42889.074 42186.813
##      co2ph12   co2ph14   co2ph16   co2ph18   co2ph20   co2ph22   co2ph24
## 1: 15442.036 15448.394 15448.394 15430.688 15081.377 15081.377 14400.887
## 2: 12160.970 12160.970 12175.256 10965.655 10986.418 10854.748 10854.748
## 3: 52895.175 52116.732 51441.761 50661.959 49512.744 49480.598 49167.256
## 4:  3980.589  3980.589  3980.589  3980.589  3980.589  3983.117  4010.092
## 5:  4352.003  4347.123  3992.430  3992.430  3990.714  3990.714  3975.101
## 6:  4780.778  4774.763  4774.763  4165.309  4207.152  4207.152  4280.612
## 7: 41817.690 41339.784 40914.417 40922.506 40192.737 40200.857 38956.328
##      co2ph26   co2ph28   co2ph30   co2ph32   co2ph34   co2ph36   co2ph38
## 1: 13517.085 12702.163 12449.599 12449.599 11638.664 11187.026 11232.500
## 2: 10898.490 10898.490 10833.688 10833.688 10833.688 10810.153 10810.153
## 3: 49082.036 48705.066 48262.818 47885.203 47811.302 47697.214 47031.216
## 4:  4007.681  4007.681  4007.681  4007.681  4002.998  3788.997  3790.458
## 5:  3975.101  3975.101  3975.101  3969.266  3969.266  3965.017  3965.017
## 6:  4403.202  4433.166  4477.224  4427.151  4403.202  4403.202  4403.202
## 7: 38671.179 38435.868 37849.771 38009.577 37291.301 36400.901 36401.802
##      co2ph40
## 1: 10846.519
## 2: 10810.153
## 3: 47118.925
## 4:  3790.458
## 5:  3965.017
## 6:  4403.202
## 7: 36454.271
# Savings
savings <- emissions[, .(isoyear, co2ph0 - .SD), .SDcols = names(emissions)[2:length(names(emissions))]]
savings
##      isoyear co2ph0      co2ph2      co2ph4     co2ph6     co2ph8    co2ph10
## 1: ercot2009      0 -497.723139 -497.723139  -63.58836  -17.56211  944.04207
## 2: ercot2018      0  335.982184  752.608723 1574.42397 2516.68879 4269.68815
## 3:  miso2018      0 1185.236086 1565.794757 1675.24987 4289.13692 5093.47491
## 4:    ne2009      0  -90.953010  -18.199217  -20.66359  -20.66359  -20.66359
## 5:    ne2018      0    2.608338    2.608338   14.51038   20.97195   20.97195
## 6:    ny2018      0 -127.478023  -40.230142   96.77207   64.89461   64.89461
## 7:   pjm2009      0  873.620992 1592.255153 2351.09393 2667.89045 3370.15201
##       co2ph12    co2ph14    co2ph16    co2ph18    co2ph20    co2ph22    co2ph24
## 1: 1045.56497 1039.20768 1039.20768 1056.91303 1406.22413 1406.22413 2086.71380
## 2: 4269.68815 4269.68815 4255.40218 5465.00304 5444.24052 5575.90961 5575.90961
## 3: 5287.86736 6066.31080 6741.28131 7521.08332 8670.29867 8702.44468 9015.78706
## 4:  -20.66359  -20.66359  -20.66359  -20.66359  -20.66359  -23.19198  -50.16752
## 5:   20.97195   25.85163  380.54484  380.54484  382.26129  382.26129  397.87403
## 6:   64.89461   70.90943   70.90943  680.36364  638.52105  638.52105  565.06069
## 7: 3739.27479 4217.18071 4642.54793 4634.45842 5364.22753 5356.10809 6600.63709
##       co2ph26    co2ph28    co2ph30     co2ph32     co2ph34    co2ph36
## 1: 2970.51664 3785.43794 4038.00234  4038.00234  4848.93681  5300.5750
## 2: 5532.16790 5532.16790 5596.97034  5596.97034  5596.97034  5620.5052
## 3: 9101.00658 9477.97678 9920.22475 10297.83930 10371.74093 10485.8285
## 4:  -47.75584  -47.75584  -47.75584   -47.75584   -43.07263   170.9277
## 5:  397.87403  397.87403  397.87403   403.70945   403.70945   407.9583
## 6:  442.47020  412.50637  368.44816   418.52119   442.47020   442.4702
## 7: 6885.78613 7121.09636 7707.19371  7547.38792  8265.66391  9156.0642
##       co2ph38    co2ph40
## 1:  5255.1016  5641.0823
## 2:  5620.5052  5620.5052
## 3: 11151.8269 11064.1171
## 4:   169.4675   169.4675
## 5:   407.9583   407.9583
## 6:   442.4702   442.4702
## 7:  9155.1632  9102.6935